//this dofile does some basic cleaning and produces a firm x month x district level dataset 

clear
set memory 500m
set matsize 100
set more off 
# delimit;

	
* Set directory; 

cd "~/Desktop/migrec_replication/do/";

	use ../dta_secure/final.dta, clear;


**  merge in complaint data; 

	merge m:1 ppno using ../dta_secure/complaints_uniq.dta, force gen(complmerge);
	keep if complmerge == 1 | complmerge == 3; 

** first isolate variables needed for data set; 

* date; 

	gen date = departure_year+departure_month; 

* demographics; 

	gen migrant  = 1; 
	gen men = (sex == "M"); 
	gen women = (sex == "F");

	gen age1 = (current_age < 30); 
	gen age2 = (current_age >= 30 & current_age < 40); 
	gen age3 = (current_age >= 40 & current_age < 50); 
	gen age4 = (current_age >= 50); 

	gen low_skill = (jobcode_skill_anf == "low"); 
	gen medium_skill = (jobcode_skill_anf == "medium"); 
	gen high_skill = (jobcode_skill_anf == "high"); 

	gen saudi = (cn_name == "Saudi Arabia");
	gen qatar = (cn_name == "Qatar"); 
	gen uae = (cn_name == "U A E"); 
	gen kuwait = (cn_name == "Kuwait"); 
	gen jordan = (cn_name == "Jordan"); 
	gen bahrain = (cn_name == "Bahrain"); 
	gen lebanon = (cn_name == "Lebanon"); 
	gen malaysia = (cn_name == "Malaysia"); 
	
	gen manlev_professional = (jobcode_man_lev == 1);
	gen manlev_skilled = (jobcode_man_lev == 2);
	gen manlev_semiskill = (jobcode_man_lev == 3);
	gen manlev_middle = (jobcode_man_lev == 4);
	gen manlev_clerical = (jobcode_man_lev == 5);
	gen manlev_unskilled = (jobcode_man_lev == 6);
	gen manlev_dw = (jobcode_man_lev == 7);
	
		destring cn_code, replace;
	levelsof cn_code; 
	foreach x in `r(levels)' {;
		g cn_num_`x' = (cn_code == `x');
	};
	
	gen domestic_worker = (job_code_domestic == 1);

** figure out main country of firms; 
** note this is not time specific;

	bysort agency_id: egen mig_tot = total(migrant); 

	foreach x of varlist saudi qatar uae kuwait jordan bahrain lebanon malaysia {; 

		bysort agency_id: egen `x'_tot = total(`x'); 
		gen `x'_frac = `x'_tot/mig_tot; 
		drop `x'_tot ; 

	}; 

	gen country_50 = "" ;

	
	foreach x of varlist saudi qatar uae kuwait jordan bahrain lebanon malaysia {; 
		gen cn_major_`x'_50 = (`x'_frac > .5); 
		replace country_50 = "`x'" if `x'_frac > .5;
	}; 
	

** figure out firms that specialize in domestic work; 

	bysort agency_id: egen domestic_tot = total(domestic_worker); 

	gen domestic_frac = domestic_tot/mig_tot; 	
	gen domestic_50 = domestic_frac > .5; 

** create tag for number of  unique villages an agency has recruited migrants from in each month; 

	egen vil_rec = tag(agency_id departure_year departure_month vil_id);

** collapse dataset to firm, district, month level; 

	collapse  (first) cn_major_*_50 domestic_50 country_50 (sum) vil_rec* usd_salary usd_salary_d
	 age1 age2 age3 age4 low_skill medium_skill
	high_skill saudi qatar uae kuwait jordan 	manlev_*
	bahrain lebanon malaysia migrant complaints breach nonpay harass death domestic_worker
	men women cn_num_*, by(vil_dis_code departure_year departure_month agency_id) ;

	label var age1 "less than 30"; 
	label var age2 "30 to 40"; 
	label var age3 "40 to 50"; 
	label var age4 "more than 50"; 
	label var breach "breach of contract complaints"; 
	label var nonpay "non payment of wages complaints"; 
	label var harass "harassment complaints"; 
	label var death "deaths";
	label var usd_salary "sum of salaries (usd)"; 
	label var usd_salary_d "sum of salaries (usd) - constant 2015 usd"; 

	la var vil_rec "unique villages of recruitment";
	
	
	
** save;
save ../dta_secure/1_firm_mn_district.dta, replace;


